//replicate KKPS estimation

use "$datadir\kkps_dat.dta", clear
set seed 1234

mat R=J(5,2,0)

// estimate the model in LogOLS and compute RMSE, MAE, and plot 
reg	lognetinc ydum1-ydum11 ydum13-ydum20 loginc [aw = hweight] 	if age<65 & landlord != 1
est sto log_mod
mat R[1,1]=_b[loginc]
mat R[5,1]=e(N)
gen indi=e(sample)
keep if indi==1
predict yhatt
predict ress, r
sum ress
gen yhat_log=exp(yhatt+((r(sd))^2)/2)


// calculate RMSE
gen help=(netinc-yhat_log)^2
sum help if indi
scalar rmse_log_s=sqrt(r(mean))
drop help
mat R[3,1]=rmse_log_s

// calculate MAE
gen help=abs(netinc-yhat_log)
sum help if indi
scalar mae_log_s=r(mean)
drop help
mat R[4,1]=mae_log_s

//bootstrap the SEs
mat B=J(500,1,0)
qui forv b=1/500{
	preserve
	bsample 
	reg	lognetinc ydum1-ydum11 ydum13-ydum20 loginc [aw = hweight] 	if age<65 & landlord != 1
	mat B[`b',1]=_b[loginc]
	restore
}

preserve

	svmat B
	sum B, d
	mat R[2,1]=r(sd)

restore


// estimate the model using NLS and compute RMSE, MAE, and plot
poisson netinc ydum1-ydum11 ydum13-ydum20 loginc [iw=hweight] if indi==1
est sto nl_mod
mat R[1,2]=_b[loginc]
mat R[5,2]=e(N)
predict yhat_nl, ir
replace yhat_nl=. if indi!=1

// calculate RMSE
gen help=(netinc-yhat_nl)^2
sum help if indi
scalar rmse_nl_s=sqrt(r(mean))
drop help
mat R[3,2]=rmse_nl_s

// calculate MAE
gen help=abs(netinc-yhat_nl)
sum help if indi
scalar mae_nl_s=r(mean)
drop help
mat R[4,2]=mae_nl_s


//bootstrap the SEs
mat B=J(500,1,0)
qui forv b=1/500{
	preserve
	bsample 
	poisson netinc ydum1-ydum11 ydum13-ydum20 loginc [iw=hweight] if indi==1 , iter(20) diff
	mat B[`b',1]=_b[loginc]
	di in red "b=" `b'
	restore
}

preserve

	svmat B
	sum B, d
	mat R[2,2]=r(sd)

restore


putexcel set "${res}res_KKPS.xlsx", modify
putexcel B2=mat(R)





